/* calculate main skewness measure and top and bottom skewness measures */

libname edu '!userprofile\\Dropbox\Education\Replication\Data';

DATA crsp;  /*read all the crsp monthly data with nonmissing return*/
set edu.crsp_data;
IF not missing (RET);
if RET ne 'C';
if RET ne 'B';
year=year(date);
month=month(date);
if 1<=EXCHCD<=3 and SHRCD in (10 11);
naics_3=compress(substr(naics,1,3));
sic_2=floor(siccd/100);
mktcap = abs(prc)*shrout;
run;

/*annualize monthly returns*/

proc sql noprint;
create table annualReturns as
select *,
  count(ret) as nbMonths,
  exp(sum(log(1+ret))) as cumulativeReturn
from crsp
group by permno, year;
select * from annualReturns;
quit;

proc sort data = annualreturns; by permno year month; run;

data annualreturns;
set annualreturns;
by permno year month;
if last.year;
if  nbmonths>=6; /* requires 6 observations in a year */
ann_ret=cumulativeReturn-1;
run;

data annualreturns;
  set annualreturns;
  by permno year;
  lagmktcap = lag(mktcap);
  if first.permno then lagmktcap = .;
run;

/* get # employees, if there are duplicate entries, choose the larger and the most recent */
data emp (keep = lpermno datadate year emp); set edu.crspcompustat_data; year = year(datadate); run; 

proc sort data = emp; by lpermno year descending datadate descending emp; run;
proc sort data = emp nodupkey; by lpermno year; run;

proc sql;
  create table annualreturns as
  select distinct a.*, b.emp
  from annualreturns as a left join emp as b
  on a.permno = b.lpermno and a.year = b.year;
quit;

/* for each permco, only the permno that has the largest market cap will be counted (get rid of multiple share classes) */
proc sort data = annualreturns;
  by permco year descending mktcap;
run;

proc means data = annualreturns noprint;
  by permco year;
  var lagmktcap;
  output out = size sum = lagmktcap;
run;

data annualreturns (drop = lagmktcap);
  set annualreturns;
  by permco year;
  if first.year;
run;

proc sql;
  create table annualreturns as
  select a.*, b.lagmktcap
  from annualreturns as a, size as b
  where a.permco = b.permco and a.year = b.year;
quit;

/* map SIC2 and NAICS3 into major code (use SIC2 before 2005; NAICS3 afterwards) */
proc sql;
  create table temp as
  select a.*, b.major 
  from annualreturns as a left join edu.sic_2_major as b
  on a.sic_2 = b.sic_2;
quit;

data temp;
  set temp;
  if year >= 2005 then delete;
run;

data temp2; set annualreturns; naics3 = input(naics_3, 12.); run;

proc sql;
  create table temp2 as
  select a.*, b.major 
  from temp2 as a left join edu.naics_3_major as b
  on a.naics3 = b.naics_3;
quit;

data temp2 (drop = naics3);
  set temp2;
  if year >= 2005;
run;

data annualreturns_major; set temp; run;
proc append base = annualreturns_major data = temp2; run;

/* calculate main skewness measure */
data annualreturns2;
set annualreturns_major;
if not missing (major);
run;

proc sort data = annualreturns2;
by year major;
run; 

proc univariate data = annualreturns2 vardef = df noprint;
by year major;
var ann_ret;
weight emp;
output out=skewness_2e skew=skew1e;
run;

data edu.industry_skew_major;
set skewness_2e;
run;

/* top and bottom measures */
data annualreturns2;
set annualreturns_major;
if not missing (major);
if ann_ret <.Z then delete;
run;

%macro skew2major(num,size);
proc sort data = annualreturns2;
by year major;
run; 

proc means data = annualreturns2 noprint;
by year major;
var lagmktcap;
output out=mktcap median=mktcap_50;
run;

proc sql;
create table annualreturns2 as
select a.*, b.mktcap_50
from annualreturns2 as a, mktcap as b
where a.year = b.year and a.major = b.major;
quit;

data return50;
  set annualreturns2;
  if lagmktcap < mktcap_&size then delete; /* drop bottom 50th percentile */
  if lagmktcap = . then delete;
run;

proc sort data = return50;
  by year major descending ann_ret;
run;

proc means data = return50 noprint;
  by year major;
  var ann_ret;
  output out = med_50 median = med_50; /* calculate median return */
run;

proc means data = return50 noprint;
  by year major;
  var ann_ret;
  output out = med_502 p90 = p90_50 p10 = p10_50; /* calculate percentile returns */
run;

data return50t;
  set return50;
  by year major;
  retain rank_old;
  if first.major then do; rank = 1; rank_old = 0; end;
  rank = rank_old + 1;
  rank_old = rank;
run;

data return50t;
  set return50t;
  if rank <= &num; /* top &num */
run;

proc sql;
  create table return50t as
  select a.*, b.med_50
  from return50t as a, med_50 as b
  where a.year = b.year and a.major = b.major;
quit;

proc sql;
  create table return50t as
  select a.*, b.p90_50, b.p10_50
  from return50t as a, med_502 as b
  where a.year = b.year and a.major = b.major;
quit;

data return50t;
  set return50t;
  ret50t = ann_ret - med_50; /* top minus median */
  if ret50t < 0 then delete;
run;

proc means data = return50t noprint;
  by year major;
  var ret50t p90_50 p10_50;
  output out = top_50 mean = retmed1 p90_50 p10_50;
run;

proc sort data = return50;
  by year major ann_ret;
run;

data return50b;
  set return50;
  by year major;
  retain rank_old;
  if first.major then do; rank = 1; rank_old = 0; end;
  rank = rank_old + 1;
  rank_old = rank;
run;

data return50b;
  set return50b;
  if rank <= &num; /* bottom &num */
run;

proc sql;
  create table return50b as
  select a.*, b.med_50
  from return50b as a, med_50 as b
  where a.year = b.year and a.major = b.major;
quit;

data return50b;
  set return50b;
  ret50b = ann_ret - med_50; /* bottom minus median */
  if ret50b > 0 then delete;
run;


proc means data = return50b noprint;
  by year major;
  var ret50b;
  output out = bot_50 mean = retmed2;
run;


proc sql;
  create table temp as
  select a.*, b.retmed1, b.p90_50, b.p10_50
  from edu.industry_skew_major as a left join top_50 as b
  on a.year = b.year and a.major = b.major;
quit;

proc sql;
  create table temp as
  select a.*, b.retmed2
  from temp as a left join bot_50 as b
  on a.year = b.year and a.major = b.major;
quit;

data edu.industry_skew_major (drop = retmed1 retmed2 p90_50 p10_50);
  set temp;
  ret&size.t_&num = retmed1/(p90_50 - p10_50);
  ret&size.b_&num = retmed2/(p90_50 - p10_50);
  ret&size.tb_&num = ret&size.t_&num + ret&size.b_&num;
run;

%mend skew2major;

%skew2major(10,50);
